{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8204ce81",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql import Row\n",
    "from pyspark import SparkContext"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3e259151",
   "metadata": {},
   "outputs": [],
   "source": [
    "sc = SparkContext(\"local\",\"My app\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8c94280e",
   "metadata": {},
   "outputs": [],
   "source": [
    "sqlContext = SparkSession.builder.getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "25054707",
   "metadata": {},
   "outputs": [],
   "source": [
    "book_rdd = sc.textFile(\"file:///home/demos/实验数据/book.txt\")\n",
    "book_rdd=book_rdd.map(lambda x:x.split(\",\")).map(lambda x: Row(id=x[0],name=x[1],rating=x[2],price=x[3],publish=x[4],url=x[5]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6b005dbf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Row(id='序号', name='书名', rating='评分', price='价格', publish='出版社', url='url'),\n",
       " Row(id='5173', name='動力取向精神醫學--臨床應用與實務', rating='10.0 ', price='1200元', publish='心灵工坊', url='https://book.douban.com/subject/6053667/'),\n",
       " Row(id='9929', name='水彩绘森活', rating='10.0 ', price='29.8', publish='人民邮电出版社', url='https://book.douban.com/subject/26115807/'),\n",
       " Row(id='10124', name='殷周金文集成(修订增补本共8册)(精)', rating='10.0 ', price='2400.00元', publish='中华书局', url='https://book.douban.com/subject/2235855/'),\n",
       " Row(id='16628', name='纸雕游戏大书', rating='10.0 ', price='99.00元', publish='重庆出版集团', url='https://book.douban.com/subject/26673804/')]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book_rdd.take(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6fe6966f",
   "metadata": {},
   "outputs": [],
   "source": [
    "header = book_rdd.first()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "473ccfed",
   "metadata": {},
   "outputs": [],
   "source": [
    "book_rdd = book_rdd.filter(lambda row:row!=header)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "eb765558",
   "metadata": {},
   "outputs": [],
   "source": [
    "book_df = sqlContext.createDataFrame(book_rdd,[\"id\",\"rating\",\"price\",\"publish\",\"url\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "688f3c9d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Row(id='5173', rating='動力取向精神醫學--臨床應用與實務', price='10.0 ', publish='1200元', url='心灵工坊', url='https://book.douban.com/subject/6053667/')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "faebec65",
   "metadata": {},
   "outputs": [],
   "source": [
    "book_df.registerTempTable(\"book_sale\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d096c986",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------------------------------+\n",
      "|                             rating|\n",
      "+-----------------------------------+\n",
      "|   動力取向精神醫學--臨床應用與實務|\n",
      "|                         水彩绘森活|\n",
      "|  殷周金文集成(修订增补本共8册)(精)|\n",
      "|                       纸雕游戏大书|\n",
      "|                       Michelangelo|\n",
      "|                  一支笔的快乐涂鸦2|\n",
      "|                         亲亲宝贝装|\n",
      "|                     Photoshop7解像|\n",
      "|                   戚蓼生序本石头记|\n",
      "|                      宇宙兄弟（7）|\n",
      "|                      宇宙兄弟（8）|\n",
      "|            TCP\\IP详解（卷1英文版）|\n",
      "|计算机程序设计艺术卷1：基本算法(...|\n",
      "|         微积分和数学分析引论-第1卷|\n",
      "|               PrinciplesofNeura...|\n",
      "|           奈特人体神经解剖彩色图谱|\n",
      "|                 数学、科学和认识论|\n",
      "|                       零基础学素描|\n",
      "|     黑白花意3：300例超写实的花之绘|\n",
      "|         黑白画意：经典植物手绘教程|\n",
      "+-----------------------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sqlContext.sql(\"select rating from book_sale\").show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "f0dcc4c4",
   "metadata": {},
   "outputs": [
    {
     "ename": "AnalysisException",
     "evalue": "Reference 'url' is ambiguous, could be: book_sale.url, book_sale.url.; line 1 pos 7",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAnalysisException\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-12-4b57953067ad>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msqlContext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"select url from book_sale\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/.virtualenvs/spark/lib/python3.6/site-packages/pyspark/sql/session.py\u001b[0m in \u001b[0;36msql\u001b[0;34m(self, sqlQuery)\u001b[0m\n\u001b[1;32m    721\u001b[0m         \u001b[0;34m[\u001b[0m\u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'row1'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'row2'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'row3'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    722\u001b[0m         \"\"\"\n\u001b[0;32m--> 723\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jsparkSession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msqlQuery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wrapped\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    724\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    725\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtableName\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.virtualenvs/spark/lib/python3.6/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m   1303\u001b[0m         \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1304\u001b[0m         return_value = get_return_value(\n\u001b[0;32m-> 1305\u001b[0;31m             answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m   1306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1307\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.virtualenvs/spark/lib/python3.6/site-packages/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m    115\u001b[0m                 \u001b[0;31m# Hide where the exception came from that shows a non-Pythonic\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0;31m# JVM exception message.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 117\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mconverted\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    118\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    119\u001b[0m                 \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAnalysisException\u001b[0m: Reference 'url' is ambiguous, could be: book_sale.url, book_sale.url.; line 1 pos 7"
     ]
    }
   ],
   "source": [
    "sqlContext.sql(\"select url from book_sale\").show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "653a1a75",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----+-----------------------------------+\n",
      "|   id|                             rating|\n",
      "+-----+-----------------------------------+\n",
      "| 5173|   動力取向精神醫學--臨床應用與實務|\n",
      "| 9929|                         水彩绘森活|\n",
      "|10124|  殷周金文集成(修订增补本共8册)(精)|\n",
      "|16628|                       纸雕游戏大书|\n",
      "|19103|                       Michelangelo|\n",
      "|20063|                  一支笔的快乐涂鸦2|\n",
      "|32781|                         亲亲宝贝装|\n",
      "|32879|                     Photoshop7解像|\n",
      "|45687|                   戚蓼生序本石头记|\n",
      "|52504|                      宇宙兄弟（7）|\n",
      "|52505|                      宇宙兄弟（8）|\n",
      "|  573|            TCP\\IP详解（卷1英文版）|\n",
      "|  589|计算机程序设计艺术卷1：基本算法(...|\n",
      "| 5522|         微积分和数学分析引论-第1卷|\n",
      "| 5547|               PrinciplesofNeura...|\n",
      "| 7443|           奈特人体神经解剖彩色图谱|\n",
      "| 8703|                 数学、科学和认识论|\n",
      "| 9924|                       零基础学素描|\n",
      "| 9926|     黑白花意3：300例超写实的花之绘|\n",
      "| 9927|         黑白画意：经典植物手绘教程|\n",
      "+-----+-----------------------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sqlContext.sql(\"select id, rating from book_sale\").show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ef30498",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
